{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "In [10]: df = pd.DataFrame({'key1' : ['a', 'a', 'b', 'b', 'a'],\n",
    "....: 'key2' : ['one', 'two', 'one', 'two', 'one'],\n",
    "....: 'data1' : np.random.randn(5),\n",
    "....: 'data2' : np.random.randn(5)})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>data1</th>\n",
       "      <th>data2</th>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.554373</td>\n",
       "      <td>-0.287962</td>\n",
       "      <td>a</td>\n",
       "      <td>one</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-0.911964</td>\n",
       "      <td>0.315323</td>\n",
       "      <td>a</td>\n",
       "      <td>two</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-1.203580</td>\n",
       "      <td>-1.760443</td>\n",
       "      <td>b</td>\n",
       "      <td>one</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.299927</td>\n",
       "      <td>2.560293</td>\n",
       "      <td>b</td>\n",
       "      <td>two</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.218899</td>\n",
       "      <td>0.236114</td>\n",
       "      <td>a</td>\n",
       "      <td>one</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      data1     data2 key1 key2\n",
       "0  0.554373 -0.287962    a  one\n",
       "1 -0.911964  0.315323    a  two\n",
       "2 -1.203580 -1.760443    b  one\n",
       "3  0.299927  2.560293    b  two\n",
       "4  1.218899  0.236114    a  one"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "grouped = df['data1'].groupby(df['key1'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<pandas.core.groupby.SeriesGroupBy object at 0x00000227B31813C8>"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grouped"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "key1\n",
       "a    0.287103\n",
       "b   -0.451826\n",
       "Name: data1, dtype: float64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grouped.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "means = df['data1'].groupby([df['key1'],df['key2']]).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "key1  key2\n",
       "a     one     0.886636\n",
       "      two    -0.911964\n",
       "b     one    -1.203580\n",
       "      two     0.299927\n",
       "Name: data1, dtype: float64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "means"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>key2</th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key1</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>0.886636</td>\n",
       "      <td>-0.911964</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>-1.203580</td>\n",
       "      <td>0.299927</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "key2       one       two\n",
       "key1                    \n",
       "a     0.886636 -0.911964\n",
       "b    -1.203580  0.299927"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "means.unstack()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "In [18]: states = np.array(['Ohio', 'California', 'California', 'Ohio',\n",
    "'Ohio'])\n",
    "In [19]: years = np.array([2005, 2005, 2006, 2005, 2006])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['Ohio', 'California', 'California', 'Ohio', 'Ohio'], dtype='<U10')"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "states"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "California  2005   -0.911964\n",
       "            2006   -1.203580\n",
       "Ohio        2005    0.854300\n",
       "            2006    1.218899\n",
       "Name: data1, dtype: float64"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.data1.groupby([states,years]).sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>data1</th>\n",
       "      <th>data2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key1</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>0.287103</td>\n",
       "      <td>0.087825</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>-0.451826</td>\n",
       "      <td>0.399925</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         data1     data2\n",
       "key1                    \n",
       "a     0.287103  0.087825\n",
       "b    -0.451826  0.399925"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('key1').mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>data1</th>\n",
       "      <th>data2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">a</th>\n",
       "      <th>one</th>\n",
       "      <td>0.886636</td>\n",
       "      <td>-0.025924</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>-0.911964</td>\n",
       "      <td>0.315323</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">b</th>\n",
       "      <th>one</th>\n",
       "      <td>-1.203580</td>\n",
       "      <td>-1.760443</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>0.299927</td>\n",
       "      <td>2.560293</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              data1     data2\n",
       "key1 key2                    \n",
       "a    one   0.886636 -0.025924\n",
       "     two  -0.911964  0.315323\n",
       "b    one  -1.203580 -1.760443\n",
       "     two   0.299927  2.560293"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby(['key1','key2']).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "key1  key2\n",
       "a     one     2\n",
       "      two     1\n",
       "b     one     1\n",
       "      two     1\n",
       "dtype: int64"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby(['key1','key2']).size()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a\n",
      "      data1     data2 key1 key2\n",
      "0  0.554373 -0.287962    a  one\n",
      "1 -0.911964  0.315323    a  two\n",
      "4  1.218899  0.236114    a  one\n",
      "b\n",
      "      data1     data2 key1 key2\n",
      "2 -1.203580 -1.760443    b  one\n",
      "3  0.299927  2.560293    b  two\n"
     ]
    }
   ],
   "source": [
    "for name,group in df.groupby('key1'):\n",
    "    print(name)\n",
    "    print(group)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('a', 'one')\n",
      "      data1     data2 key1 key2\n",
      "0  0.554373 -0.287962    a  one\n",
      "4  1.218899  0.236114    a  one\n",
      "('a', 'two')\n",
      "      data1     data2 key1 key2\n",
      "1 -0.911964  0.315323    a  two\n",
      "('b', 'one')\n",
      "     data1     data2 key1 key2\n",
      "2 -1.20358 -1.760443    b  one\n",
      "('b', 'two')\n",
      "      data1     data2 key1 key2\n",
      "3  0.299927  2.560293    b  two\n"
     ]
    }
   ],
   "source": [
    "for name,group in df.groupby(['key1','key2']):\n",
    "    print(name)\n",
    "    print(group)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "pieces = dict(list(df.groupby('key1')))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>data1</th>\n",
       "      <th>data2</th>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-1.203580</td>\n",
       "      <td>-1.760443</td>\n",
       "      <td>b</td>\n",
       "      <td>one</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.299927</td>\n",
       "      <td>2.560293</td>\n",
       "      <td>b</td>\n",
       "      <td>two</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      data1     data2 key1 key2\n",
       "2 -1.203580 -1.760443    b  one\n",
       "3  0.299927  2.560293    b  two"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pieces['b']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "data1    float64\n",
       "data2    float64\n",
       "key1      object\n",
       "key2      object\n",
       "dtype: object"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "grouped = df.groupby(df.dtypes,axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "float64\n",
      "      data1     data2\n",
      "0  0.554373 -0.287962\n",
      "1 -0.911964  0.315323\n",
      "2 -1.203580 -1.760443\n",
      "3  0.299927  2.560293\n",
      "4  1.218899  0.236114\n",
      "object\n",
      "  key1 key2\n",
      "0    a  one\n",
      "1    a  two\n",
      "2    b  one\n",
      "3    b  two\n",
      "4    a  one\n"
     ]
    }
   ],
   "source": [
    "for dtype,group in grouped:\n",
    "    print(dtype)\n",
    "    print(group)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<pandas.core.groupby.SeriesGroupBy object at 0x00000227B87A3908>"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby('key1')['data1']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>data2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">a</th>\n",
       "      <th>one</th>\n",
       "      <td>-0.025924</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>0.315323</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">b</th>\n",
       "      <th>one</th>\n",
       "      <td>-1.760443</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>2.560293</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              data2\n",
       "key1 key2          \n",
       "a    one  -0.025924\n",
       "     two   0.315323\n",
       "b    one  -1.760443\n",
       "     two   2.560293"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby(['key1','key2'])[['data2']].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "s_grouped = df.groupby(['key1','key2'])['data2']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<pandas.core.groupby.SeriesGroupBy object at 0x00000227B4F61A20>"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s_grouped"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "key1  key2\n",
       "a     one    -0.025924\n",
       "      two     0.315323\n",
       "b     one    -1.760443\n",
       "      two     2.560293\n",
       "Name: data2, dtype: float64"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s_grouped.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>e</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Joe</th>\n",
       "      <td>0.228064</td>\n",
       "      <td>1.592918</td>\n",
       "      <td>-1.371554</td>\n",
       "      <td>0.274566</td>\n",
       "      <td>0.162096</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Steve</th>\n",
       "      <td>-0.602570</td>\n",
       "      <td>0.836633</td>\n",
       "      <td>1.170803</td>\n",
       "      <td>1.016493</td>\n",
       "      <td>-0.200838</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Wes</th>\n",
       "      <td>0.709385</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.432945</td>\n",
       "      <td>0.015160</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Jim</th>\n",
       "      <td>-1.468403</td>\n",
       "      <td>-2.071582</td>\n",
       "      <td>0.233385</td>\n",
       "      <td>0.795268</td>\n",
       "      <td>0.555620</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Travis</th>\n",
       "      <td>1.479975</td>\n",
       "      <td>1.749793</td>\n",
       "      <td>2.291599</td>\n",
       "      <td>-0.095310</td>\n",
       "      <td>0.834088</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               a         b         c         d         e\n",
       "Joe     0.228064  1.592918 -1.371554  0.274566  0.162096\n",
       "Steve  -0.602570  0.836633  1.170803  1.016493 -0.200838\n",
       "Wes     0.709385       NaN       NaN  0.432945  0.015160\n",
       "Jim    -1.468403 -2.071582  0.233385  0.795268  0.555620\n",
       "Travis  1.479975  1.749793  2.291599 -0.095310  0.834088"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "In [35]: people = pd.DataFrame(np.random.randn(5, 5),\n",
    "....: columns=['a', 'b', 'c', 'd', 'e'],\n",
    "....: index=['Joe', 'Steve', 'Wes', 'Jim',\n",
    "'Travis'])\n",
    "In [36]: people.iloc[2:3, [1, 2]] = np.nan # Add a few NA values\n",
    "In [37]: people"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "In [38]: mapping = {'a': 'red', 'b': 'red', 'c': 'blue',\n",
    "....: 'd': 'blue', 'e': 'red', 'f' : 'orange'}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'a': 'red', 'b': 'red', 'c': 'blue', 'd': 'blue', 'e': 'red', 'f': 'orange'}"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mapping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "by_column = people.groupby(mapping,axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>blue</th>\n",
       "      <th>red</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Joe</th>\n",
       "      <td>-1.096988</td>\n",
       "      <td>1.983077</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Steve</th>\n",
       "      <td>2.187296</td>\n",
       "      <td>0.033225</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Wes</th>\n",
       "      <td>0.432945</td>\n",
       "      <td>0.724546</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Jim</th>\n",
       "      <td>1.028654</td>\n",
       "      <td>-2.984365</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Travis</th>\n",
       "      <td>2.196289</td>\n",
       "      <td>4.063856</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            blue       red\n",
       "Joe    -1.096988  1.983077\n",
       "Steve   2.187296  0.033225\n",
       "Wes     0.432945  0.724546\n",
       "Jim     1.028654 -2.984365\n",
       "Travis  2.196289  4.063856"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "by_column.sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "map_series = pd.Series(mapping)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a       red\n",
       "b       red\n",
       "c      blue\n",
       "d      blue\n",
       "e       red\n",
       "f    orange\n",
       "dtype: object"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "map_series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>blue</th>\n",
       "      <th>red</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Joe</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Steve</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Wes</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Jim</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Travis</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        blue  red\n",
       "Joe        2    3\n",
       "Steve      2    3\n",
       "Wes        1    2\n",
       "Jim        2    3\n",
       "Travis     2    3"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "people.groupby(map_series,axis=1).count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>e</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Joe</th>\n",
       "      <td>0.228064</td>\n",
       "      <td>1.592918</td>\n",
       "      <td>-1.371554</td>\n",
       "      <td>0.274566</td>\n",
       "      <td>0.162096</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Steve</th>\n",
       "      <td>-0.602570</td>\n",
       "      <td>0.836633</td>\n",
       "      <td>1.170803</td>\n",
       "      <td>1.016493</td>\n",
       "      <td>-0.200838</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Wes</th>\n",
       "      <td>0.709385</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.432945</td>\n",
       "      <td>0.015160</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Jim</th>\n",
       "      <td>-1.468403</td>\n",
       "      <td>-2.071582</td>\n",
       "      <td>0.233385</td>\n",
       "      <td>0.795268</td>\n",
       "      <td>0.555620</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Travis</th>\n",
       "      <td>1.479975</td>\n",
       "      <td>1.749793</td>\n",
       "      <td>2.291599</td>\n",
       "      <td>-0.095310</td>\n",
       "      <td>0.834088</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               a         b         c         d         e\n",
       "Joe     0.228064  1.592918 -1.371554  0.274566  0.162096\n",
       "Steve  -0.602570  0.836633  1.170803  1.016493 -0.200838\n",
       "Wes     0.709385       NaN       NaN  0.432945  0.015160\n",
       "Jim    -1.468403 -2.071582  0.233385  0.795268  0.555620\n",
       "Travis  1.479975  1.749793  2.291599 -0.095310  0.834088"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "people"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>e</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.530954</td>\n",
       "      <td>-0.478664</td>\n",
       "      <td>-1.138169</td>\n",
       "      <td>1.502779</td>\n",
       "      <td>0.732877</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>-0.602570</td>\n",
       "      <td>0.836633</td>\n",
       "      <td>1.170803</td>\n",
       "      <td>1.016493</td>\n",
       "      <td>-0.200838</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1.479975</td>\n",
       "      <td>1.749793</td>\n",
       "      <td>2.291599</td>\n",
       "      <td>-0.095310</td>\n",
       "      <td>0.834088</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          a         b         c         d         e\n",
       "3 -0.530954 -0.478664 -1.138169  1.502779  0.732877\n",
       "5 -0.602570  0.836633  1.170803  1.016493 -0.200838\n",
       "6  1.479975  1.749793  2.291599 -0.095310  0.834088"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "people.groupby(len).sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>e</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>e</th>\n",
       "      <td>-0.374506</td>\n",
       "      <td>2.429551</td>\n",
       "      <td>-0.200751</td>\n",
       "      <td>1.291059</td>\n",
       "      <td>-0.038742</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>m</th>\n",
       "      <td>-1.468403</td>\n",
       "      <td>-2.071582</td>\n",
       "      <td>0.233385</td>\n",
       "      <td>0.795268</td>\n",
       "      <td>0.555620</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>s</th>\n",
       "      <td>2.189360</td>\n",
       "      <td>1.749793</td>\n",
       "      <td>2.291599</td>\n",
       "      <td>0.337635</td>\n",
       "      <td>0.849249</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          a         b         c         d         e\n",
       "e -0.374506  2.429551 -0.200751  1.291059 -0.038742\n",
       "m -1.468403 -2.071582  0.233385  0.795268  0.555620\n",
       "s  2.189360  1.749793  2.291599  0.337635  0.849249"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "people.groupby(lambda x: x[-1]).sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    " key_list = ['one', 'one', 'one', 'two', 'two']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>e</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">3</th>\n",
       "      <th>one</th>\n",
       "      <td>0.228064</td>\n",
       "      <td>1.592918</td>\n",
       "      <td>-1.371554</td>\n",
       "      <td>0.274566</td>\n",
       "      <td>0.015160</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <td>-1.468403</td>\n",
       "      <td>-2.071582</td>\n",
       "      <td>0.233385</td>\n",
       "      <td>0.795268</td>\n",
       "      <td>0.555620</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <th>one</th>\n",
       "      <td>-0.602570</td>\n",
       "      <td>0.836633</td>\n",
       "      <td>1.170803</td>\n",
       "      <td>1.016493</td>\n",
       "      <td>-0.200838</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <th>two</th>\n",
       "      <td>1.479975</td>\n",
       "      <td>1.749793</td>\n",
       "      <td>2.291599</td>\n",
       "      <td>-0.095310</td>\n",
       "      <td>0.834088</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              a         b         c         d         e\n",
       "3 one  0.228064  1.592918 -1.371554  0.274566  0.015160\n",
       "  two -1.468403 -2.071582  0.233385  0.795268  0.555620\n",
       "5 one -0.602570  0.836633  1.170803  1.016493 -0.200838\n",
       "6 two  1.479975  1.749793  2.291599 -0.095310  0.834088"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "people.groupby([len,key_list]).min()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "In [47]: columns = pd.MultiIndex.from_arrays([['US', 'US', 'US', 'JP',\n",
    "'JP'],\n",
    "....: [1, 3, 5, 1, 3]],\n",
    "....: names=['cty', 'tenor'])\n",
    "In [48]: hier_df = pd.DataFrame(np.random.randn(4, 5), columns=columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>cty</th>\n",
       "      <th colspan=\"3\" halign=\"left\">US</th>\n",
       "      <th colspan=\"2\" halign=\"left\">JP</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>tenor</th>\n",
       "      <th>1</th>\n",
       "      <th>3</th>\n",
       "      <th>5</th>\n",
       "      <th>1</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-0.401821</td>\n",
       "      <td>0.671074</td>\n",
       "      <td>-0.057992</td>\n",
       "      <td>1.685946</td>\n",
       "      <td>0.132040</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.208252</td>\n",
       "      <td>0.867373</td>\n",
       "      <td>0.436066</td>\n",
       "      <td>-0.271943</td>\n",
       "      <td>-0.957871</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-0.519773</td>\n",
       "      <td>0.941798</td>\n",
       "      <td>-0.123998</td>\n",
       "      <td>-0.823062</td>\n",
       "      <td>0.340399</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-1.569490</td>\n",
       "      <td>1.163800</td>\n",
       "      <td>-1.230854</td>\n",
       "      <td>1.740729</td>\n",
       "      <td>0.327270</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "cty          US                            JP          \n",
       "tenor         1         3         5         1         3\n",
       "0     -0.401821  0.671074 -0.057992  1.685946  0.132040\n",
       "1      1.208252  0.867373  0.436066 -0.271943 -0.957871\n",
       "2     -0.519773  0.941798 -0.123998 -0.823062  0.340399\n",
       "3     -1.569490  1.163800 -1.230854  1.740729  0.327270"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hier_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>cty</th>\n",
       "      <th>JP</th>\n",
       "      <th>US</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "cty  JP  US\n",
       "0     2   3\n",
       "1     2   3\n",
       "2     2   3\n",
       "3     2   3"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hier_df.groupby(level=0,axis=1).count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>data1</th>\n",
       "      <th>data2</th>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.554373</td>\n",
       "      <td>-0.287962</td>\n",
       "      <td>a</td>\n",
       "      <td>one</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-0.911964</td>\n",
       "      <td>0.315323</td>\n",
       "      <td>a</td>\n",
       "      <td>two</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-1.203580</td>\n",
       "      <td>-1.760443</td>\n",
       "      <td>b</td>\n",
       "      <td>one</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.299927</td>\n",
       "      <td>2.560293</td>\n",
       "      <td>b</td>\n",
       "      <td>two</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.218899</td>\n",
       "      <td>0.236114</td>\n",
       "      <td>a</td>\n",
       "      <td>one</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      data1     data2 key1 key2\n",
       "0  0.554373 -0.287962    a  one\n",
       "1 -0.911964  0.315323    a  two\n",
       "2 -1.203580 -1.760443    b  one\n",
       "3  0.299927  2.560293    b  two\n",
       "4  1.218899  0.236114    a  one"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "grouped = df.groupby('key1')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "key1     \n",
       "a     0.1   -0.618696\n",
       "      0.2   -0.325429\n",
       "      0.3   -0.032162\n",
       "      0.4    0.261106\n",
       "      0.5    0.554373\n",
       "      0.6    0.687278\n",
       "      0.7    0.820183\n",
       "      0.8    0.953089\n",
       "      0.9    1.085994\n",
       "b     0.1   -1.053229\n",
       "      0.2   -0.902879\n",
       "      0.3   -0.752528\n",
       "      0.4   -0.602177\n",
       "      0.5   -0.451826\n",
       "      0.6   -0.301476\n",
       "      0.7   -0.151125\n",
       "      0.8   -0.000774\n",
       "      0.9    0.149576\n",
       "Name: data1, dtype: float64"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grouped.data1.quantile([i/10 for i in range(1,10)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [],
   "source": [
    "def peak2peak(arr):\n",
    "    return arr.max()-arr.min()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>data1</th>\n",
       "      <th>data2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key1</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>2.130863</td>\n",
       "      <td>0.603286</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>1.503507</td>\n",
       "      <td>4.320737</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         data1     data2\n",
       "key1                    \n",
       "a     2.130863  0.603286\n",
       "b     1.503507  4.320737"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grouped.agg(peak2peak)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"8\" halign=\"left\">data1</th>\n",
       "      <th colspan=\"8\" halign=\"left\">data2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key1</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>3.0</td>\n",
       "      <td>0.287103</td>\n",
       "      <td>1.090284</td>\n",
       "      <td>-0.911964</td>\n",
       "      <td>-0.178795</td>\n",
       "      <td>0.554373</td>\n",
       "      <td>0.886636</td>\n",
       "      <td>1.218899</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.087825</td>\n",
       "      <td>0.327842</td>\n",
       "      <td>-0.287962</td>\n",
       "      <td>-0.025924</td>\n",
       "      <td>0.236114</td>\n",
       "      <td>0.275719</td>\n",
       "      <td>0.315323</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>2.0</td>\n",
       "      <td>-0.451826</td>\n",
       "      <td>1.063140</td>\n",
       "      <td>-1.203580</td>\n",
       "      <td>-0.827703</td>\n",
       "      <td>-0.451826</td>\n",
       "      <td>-0.075950</td>\n",
       "      <td>0.299927</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.399925</td>\n",
       "      <td>3.055222</td>\n",
       "      <td>-1.760443</td>\n",
       "      <td>-0.680259</td>\n",
       "      <td>0.399925</td>\n",
       "      <td>1.480109</td>\n",
       "      <td>2.560293</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     data1                                                              \\\n",
       "     count      mean       std       min       25%       50%       75%   \n",
       "key1                                                                     \n",
       "a      3.0  0.287103  1.090284 -0.911964 -0.178795  0.554373  0.886636   \n",
       "b      2.0 -0.451826  1.063140 -1.203580 -0.827703 -0.451826 -0.075950   \n",
       "\n",
       "               data2                                                    \\\n",
       "           max count      mean       std       min       25%       50%   \n",
       "key1                                                                     \n",
       "a     1.218899   3.0  0.087825  0.327842 -0.287962 -0.025924  0.236114   \n",
       "b     0.299927   2.0  0.399925  3.055222 -1.760443 -0.680259  0.399925   \n",
       "\n",
       "                          \n",
       "           75%       max  \n",
       "key1                      \n",
       "a     0.275719  0.315323  \n",
       "b     1.480109  2.560293  "
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grouped.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>total_bill</th>\n",
       "      <th>tip</th>\n",
       "      <th>smoker</th>\n",
       "      <th>day</th>\n",
       "      <th>time</th>\n",
       "      <th>size</th>\n",
       "      <th>tip_pct</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>16.99</td>\n",
       "      <td>1.01</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "      <td>0.059447</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10.34</td>\n",
       "      <td>1.66</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "      <td>0.160542</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>21.01</td>\n",
       "      <td>3.50</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "      <td>0.166587</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>23.68</td>\n",
       "      <td>3.31</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "      <td>0.139780</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>24.59</td>\n",
       "      <td>3.61</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "      <td>0.146808</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>25.29</td>\n",
       "      <td>4.71</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "      <td>0.186240</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   total_bill   tip smoker  day    time  size   tip_pct\n",
       "0       16.99  1.01     No  Sun  Dinner     2  0.059447\n",
       "1       10.34  1.66     No  Sun  Dinner     3  0.160542\n",
       "2       21.01  3.50     No  Sun  Dinner     3  0.166587\n",
       "3       23.68  3.31     No  Sun  Dinner     2  0.139780\n",
       "4       24.59  3.61     No  Sun  Dinner     4  0.146808\n",
       "5       25.29  4.71     No  Sun  Dinner     4  0.186240"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "In [57]: tips = pd.read_csv('examples/tips.csv')\n",
    "# Add tip percentage of total bill\n",
    "In [58]: tips['tip_pct'] = tips['tip'] / tips['total_bill']\n",
    "In [59]: tips[:6]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "grouped = tips.groupby(['day','smoker'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [],
   "source": [
    "grouped_pct = grouped['tip_pct']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "day   smoker\n",
       "Fri   No        0.151650\n",
       "      Yes       0.174783\n",
       "Sat   No        0.158048\n",
       "      Yes       0.147906\n",
       "Sun   No        0.160113\n",
       "      Yes       0.187250\n",
       "Thur  No        0.160298\n",
       "      Yes       0.163863\n",
       "Name: tip_pct, dtype: float64"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grouped_pct.agg('mean')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>peak2peak</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>day</th>\n",
       "      <th>smoker</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Fri</th>\n",
       "      <th>No</th>\n",
       "      <td>0.151650</td>\n",
       "      <td>0.028123</td>\n",
       "      <td>0.067349</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>0.174783</td>\n",
       "      <td>0.051293</td>\n",
       "      <td>0.159925</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Sat</th>\n",
       "      <th>No</th>\n",
       "      <td>0.158048</td>\n",
       "      <td>0.039767</td>\n",
       "      <td>0.235193</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>0.147906</td>\n",
       "      <td>0.061375</td>\n",
       "      <td>0.290095</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Sun</th>\n",
       "      <th>No</th>\n",
       "      <td>0.160113</td>\n",
       "      <td>0.042347</td>\n",
       "      <td>0.193226</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>0.187250</td>\n",
       "      <td>0.154134</td>\n",
       "      <td>0.644685</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Thur</th>\n",
       "      <th>No</th>\n",
       "      <td>0.160298</td>\n",
       "      <td>0.038774</td>\n",
       "      <td>0.193350</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>0.163863</td>\n",
       "      <td>0.039389</td>\n",
       "      <td>0.151240</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 mean       std  peak2peak\n",
       "day  smoker                               \n",
       "Fri  No      0.151650  0.028123   0.067349\n",
       "     Yes     0.174783  0.051293   0.159925\n",
       "Sat  No      0.158048  0.039767   0.235193\n",
       "     Yes     0.147906  0.061375   0.290095\n",
       "Sun  No      0.160113  0.042347   0.193226\n",
       "     Yes     0.187250  0.154134   0.644685\n",
       "Thur No      0.160298  0.038774   0.193350\n",
       "     Yes     0.163863  0.039389   0.151240"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grouped_pct.agg(['mean','std',peak2peak])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>foo</th>\n",
       "      <th>bar</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>day</th>\n",
       "      <th>smoker</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Fri</th>\n",
       "      <th>No</th>\n",
       "      <td>0.151650</td>\n",
       "      <td>0.028123</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>0.174783</td>\n",
       "      <td>0.051293</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Sat</th>\n",
       "      <th>No</th>\n",
       "      <td>0.158048</td>\n",
       "      <td>0.039767</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>0.147906</td>\n",
       "      <td>0.061375</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Sun</th>\n",
       "      <th>No</th>\n",
       "      <td>0.160113</td>\n",
       "      <td>0.042347</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>0.187250</td>\n",
       "      <td>0.154134</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Thur</th>\n",
       "      <th>No</th>\n",
       "      <td>0.160298</td>\n",
       "      <td>0.038774</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>0.163863</td>\n",
       "      <td>0.039389</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  foo       bar\n",
       "day  smoker                    \n",
       "Fri  No      0.151650  0.028123\n",
       "     Yes     0.174783  0.051293\n",
       "Sat  No      0.158048  0.039767\n",
       "     Yes     0.147906  0.061375\n",
       "Sun  No      0.160113  0.042347\n",
       "     Yes     0.187250  0.154134\n",
       "Thur No      0.160298  0.038774\n",
       "     Yes     0.163863  0.039389"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grouped_pct.agg([('foo','mean'),('bar',np.std)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [],
   "source": [
    "functionss = ['count','mean','max']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = grouped['tip_pct','total_bill'].agg(functionss)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th colspan=\"3\" halign=\"left\">tip_pct</th>\n",
       "      <th colspan=\"3\" halign=\"left\">total_bill</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>max</th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>day</th>\n",
       "      <th>smoker</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Fri</th>\n",
       "      <th>No</th>\n",
       "      <td>4</td>\n",
       "      <td>0.151650</td>\n",
       "      <td>0.187735</td>\n",
       "      <td>4</td>\n",
       "      <td>18.420000</td>\n",
       "      <td>22.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>15</td>\n",
       "      <td>0.174783</td>\n",
       "      <td>0.263480</td>\n",
       "      <td>15</td>\n",
       "      <td>16.813333</td>\n",
       "      <td>40.17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Sat</th>\n",
       "      <th>No</th>\n",
       "      <td>45</td>\n",
       "      <td>0.158048</td>\n",
       "      <td>0.291990</td>\n",
       "      <td>45</td>\n",
       "      <td>19.661778</td>\n",
       "      <td>48.33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>42</td>\n",
       "      <td>0.147906</td>\n",
       "      <td>0.325733</td>\n",
       "      <td>42</td>\n",
       "      <td>21.276667</td>\n",
       "      <td>50.81</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Sun</th>\n",
       "      <th>No</th>\n",
       "      <td>57</td>\n",
       "      <td>0.160113</td>\n",
       "      <td>0.252672</td>\n",
       "      <td>57</td>\n",
       "      <td>20.506667</td>\n",
       "      <td>48.17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>19</td>\n",
       "      <td>0.187250</td>\n",
       "      <td>0.710345</td>\n",
       "      <td>19</td>\n",
       "      <td>24.120000</td>\n",
       "      <td>45.35</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Thur</th>\n",
       "      <th>No</th>\n",
       "      <td>45</td>\n",
       "      <td>0.160298</td>\n",
       "      <td>0.266312</td>\n",
       "      <td>45</td>\n",
       "      <td>17.113111</td>\n",
       "      <td>41.19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>17</td>\n",
       "      <td>0.163863</td>\n",
       "      <td>0.241255</td>\n",
       "      <td>17</td>\n",
       "      <td>19.190588</td>\n",
       "      <td>43.11</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            tip_pct                     total_bill                  \n",
       "              count      mean       max      count       mean    max\n",
       "day  smoker                                                         \n",
       "Fri  No           4  0.151650  0.187735          4  18.420000  22.75\n",
       "     Yes         15  0.174783  0.263480         15  16.813333  40.17\n",
       "Sat  No          45  0.158048  0.291990         45  19.661778  48.33\n",
       "     Yes         42  0.147906  0.325733         42  21.276667  50.81\n",
       "Sun  No          57  0.160113  0.252672         57  20.506667  48.17\n",
       "     Yes         19  0.187250  0.710345         19  24.120000  45.35\n",
       "Thur No          45  0.160298  0.266312         45  17.113111  41.19\n",
       "     Yes         17  0.163863  0.241255         17  19.190588  43.11"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>day</th>\n",
       "      <th>smoker</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Fri</th>\n",
       "      <th>No</th>\n",
       "      <td>4</td>\n",
       "      <td>0.151650</td>\n",
       "      <td>0.187735</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>15</td>\n",
       "      <td>0.174783</td>\n",
       "      <td>0.263480</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Sat</th>\n",
       "      <th>No</th>\n",
       "      <td>45</td>\n",
       "      <td>0.158048</td>\n",
       "      <td>0.291990</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>42</td>\n",
       "      <td>0.147906</td>\n",
       "      <td>0.325733</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Sun</th>\n",
       "      <th>No</th>\n",
       "      <td>57</td>\n",
       "      <td>0.160113</td>\n",
       "      <td>0.252672</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>19</td>\n",
       "      <td>0.187250</td>\n",
       "      <td>0.710345</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Thur</th>\n",
       "      <th>No</th>\n",
       "      <td>45</td>\n",
       "      <td>0.160298</td>\n",
       "      <td>0.266312</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>17</td>\n",
       "      <td>0.163863</td>\n",
       "      <td>0.241255</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             count      mean       max\n",
       "day  smoker                           \n",
       "Fri  No          4  0.151650  0.187735\n",
       "     Yes        15  0.174783  0.263480\n",
       "Sat  No         45  0.158048  0.291990\n",
       "     Yes        42  0.147906  0.325733\n",
       "Sun  No         57  0.160113  0.252672\n",
       "     Yes        19  0.187250  0.710345\n",
       "Thur No         45  0.160298  0.266312\n",
       "     Yes        17  0.163863  0.241255"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result['tip_pct']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [],
   "source": [
    "ftuples = [('Durch','mean'),('Abwei',np.var)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th colspan=\"2\" halign=\"left\">tip_pct</th>\n",
       "      <th colspan=\"2\" halign=\"left\">total_bill</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>Durch</th>\n",
       "      <th>Abwei</th>\n",
       "      <th>Durch</th>\n",
       "      <th>Abwei</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>day</th>\n",
       "      <th>smoker</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Fri</th>\n",
       "      <th>No</th>\n",
       "      <td>0.151650</td>\n",
       "      <td>0.000791</td>\n",
       "      <td>18.420000</td>\n",
       "      <td>25.596333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>0.174783</td>\n",
       "      <td>0.002631</td>\n",
       "      <td>16.813333</td>\n",
       "      <td>82.562438</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Sat</th>\n",
       "      <th>No</th>\n",
       "      <td>0.158048</td>\n",
       "      <td>0.001581</td>\n",
       "      <td>19.661778</td>\n",
       "      <td>79.908965</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>0.147906</td>\n",
       "      <td>0.003767</td>\n",
       "      <td>21.276667</td>\n",
       "      <td>101.387535</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Sun</th>\n",
       "      <th>No</th>\n",
       "      <td>0.160113</td>\n",
       "      <td>0.001793</td>\n",
       "      <td>20.506667</td>\n",
       "      <td>66.099980</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>0.187250</td>\n",
       "      <td>0.023757</td>\n",
       "      <td>24.120000</td>\n",
       "      <td>109.046044</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Thur</th>\n",
       "      <th>No</th>\n",
       "      <td>0.160298</td>\n",
       "      <td>0.001503</td>\n",
       "      <td>17.113111</td>\n",
       "      <td>59.625081</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>0.163863</td>\n",
       "      <td>0.001551</td>\n",
       "      <td>19.190588</td>\n",
       "      <td>69.808518</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              tip_pct           total_bill            \n",
       "                Durch     Abwei      Durch       Abwei\n",
       "day  smoker                                           \n",
       "Fri  No      0.151650  0.000791  18.420000   25.596333\n",
       "     Yes     0.174783  0.002631  16.813333   82.562438\n",
       "Sat  No      0.158048  0.001581  19.661778   79.908965\n",
       "     Yes     0.147906  0.003767  21.276667  101.387535\n",
       "Sun  No      0.160113  0.001793  20.506667   66.099980\n",
       "     Yes     0.187250  0.023757  24.120000  109.046044\n",
       "Thur No      0.160298  0.001503  17.113111   59.625081\n",
       "     Yes     0.163863  0.001551  19.190588   69.808518"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grouped['tip_pct','total_bill'].agg(ftuples)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>tip</th>\n",
       "      <th>size</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>day</th>\n",
       "      <th>smoker</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Fri</th>\n",
       "      <th>No</th>\n",
       "      <td>3.50</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>4.73</td>\n",
       "      <td>31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Sat</th>\n",
       "      <th>No</th>\n",
       "      <td>9.00</td>\n",
       "      <td>115</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>10.00</td>\n",
       "      <td>104</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Sun</th>\n",
       "      <th>No</th>\n",
       "      <td>6.00</td>\n",
       "      <td>167</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>6.50</td>\n",
       "      <td>49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Thur</th>\n",
       "      <th>No</th>\n",
       "      <td>6.70</td>\n",
       "      <td>112</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>5.00</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               tip  size\n",
       "day  smoker             \n",
       "Fri  No       3.50     9\n",
       "     Yes      4.73    31\n",
       "Sat  No       9.00   115\n",
       "     Yes     10.00   104\n",
       "Sun  No       6.00   167\n",
       "     Yes      6.50    49\n",
       "Thur No       6.70   112\n",
       "     Yes      5.00    40"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grouped.agg({'tip':np.max,'size':'sum'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th colspan=\"4\" halign=\"left\">tip_pct</th>\n",
       "      <th>size</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>min</th>\n",
       "      <th>max</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>sum</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>day</th>\n",
       "      <th>smoker</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Fri</th>\n",
       "      <th>No</th>\n",
       "      <td>0.120385</td>\n",
       "      <td>0.187735</td>\n",
       "      <td>0.151650</td>\n",
       "      <td>0.028123</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>0.103555</td>\n",
       "      <td>0.263480</td>\n",
       "      <td>0.174783</td>\n",
       "      <td>0.051293</td>\n",
       "      <td>31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Sat</th>\n",
       "      <th>No</th>\n",
       "      <td>0.056797</td>\n",
       "      <td>0.291990</td>\n",
       "      <td>0.158048</td>\n",
       "      <td>0.039767</td>\n",
       "      <td>115</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>0.035638</td>\n",
       "      <td>0.325733</td>\n",
       "      <td>0.147906</td>\n",
       "      <td>0.061375</td>\n",
       "      <td>104</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Sun</th>\n",
       "      <th>No</th>\n",
       "      <td>0.059447</td>\n",
       "      <td>0.252672</td>\n",
       "      <td>0.160113</td>\n",
       "      <td>0.042347</td>\n",
       "      <td>167</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>0.065660</td>\n",
       "      <td>0.710345</td>\n",
       "      <td>0.187250</td>\n",
       "      <td>0.154134</td>\n",
       "      <td>49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Thur</th>\n",
       "      <th>No</th>\n",
       "      <td>0.072961</td>\n",
       "      <td>0.266312</td>\n",
       "      <td>0.160298</td>\n",
       "      <td>0.038774</td>\n",
       "      <td>112</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>0.090014</td>\n",
       "      <td>0.241255</td>\n",
       "      <td>0.163863</td>\n",
       "      <td>0.039389</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              tip_pct                               size\n",
       "                  min       max      mean       std  sum\n",
       "day  smoker                                             \n",
       "Fri  No      0.120385  0.187735  0.151650  0.028123    9\n",
       "     Yes     0.103555  0.263480  0.174783  0.051293   31\n",
       "Sat  No      0.056797  0.291990  0.158048  0.039767  115\n",
       "     Yes     0.035638  0.325733  0.147906  0.061375  104\n",
       "Sun  No      0.059447  0.252672  0.160113  0.042347  167\n",
       "     Yes     0.065660  0.710345  0.187250  0.154134   49\n",
       "Thur No      0.072961  0.266312  0.160298  0.038774  112\n",
       "     Yes     0.090014  0.241255  0.163863  0.039389   40"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grouped.agg(dict(tip_pct=['min','max','mean','std'],size='sum'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>day</th>\n",
       "      <th>smoker</th>\n",
       "      <th>total_bill</th>\n",
       "      <th>tip</th>\n",
       "      <th>size</th>\n",
       "      <th>tip_pct</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Fri</td>\n",
       "      <td>No</td>\n",
       "      <td>18.420000</td>\n",
       "      <td>2.812500</td>\n",
       "      <td>2.250000</td>\n",
       "      <td>0.151650</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Fri</td>\n",
       "      <td>Yes</td>\n",
       "      <td>16.813333</td>\n",
       "      <td>2.714000</td>\n",
       "      <td>2.066667</td>\n",
       "      <td>0.174783</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Sat</td>\n",
       "      <td>No</td>\n",
       "      <td>19.661778</td>\n",
       "      <td>3.102889</td>\n",
       "      <td>2.555556</td>\n",
       "      <td>0.158048</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Sat</td>\n",
       "      <td>Yes</td>\n",
       "      <td>21.276667</td>\n",
       "      <td>2.875476</td>\n",
       "      <td>2.476190</td>\n",
       "      <td>0.147906</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Sun</td>\n",
       "      <td>No</td>\n",
       "      <td>20.506667</td>\n",
       "      <td>3.167895</td>\n",
       "      <td>2.929825</td>\n",
       "      <td>0.160113</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Sun</td>\n",
       "      <td>Yes</td>\n",
       "      <td>24.120000</td>\n",
       "      <td>3.516842</td>\n",
       "      <td>2.578947</td>\n",
       "      <td>0.187250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Thur</td>\n",
       "      <td>No</td>\n",
       "      <td>17.113111</td>\n",
       "      <td>2.673778</td>\n",
       "      <td>2.488889</td>\n",
       "      <td>0.160298</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Thur</td>\n",
       "      <td>Yes</td>\n",
       "      <td>19.190588</td>\n",
       "      <td>3.030000</td>\n",
       "      <td>2.352941</td>\n",
       "      <td>0.163863</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    day smoker  total_bill       tip      size   tip_pct\n",
       "0   Fri     No   18.420000  2.812500  2.250000  0.151650\n",
       "1   Fri    Yes   16.813333  2.714000  2.066667  0.174783\n",
       "2   Sat     No   19.661778  3.102889  2.555556  0.158048\n",
       "3   Sat    Yes   21.276667  2.875476  2.476190  0.147906\n",
       "4   Sun     No   20.506667  3.167895  2.929825  0.160113\n",
       "5   Sun    Yes   24.120000  3.516842  2.578947  0.187250\n",
       "6  Thur     No   17.113111  2.673778  2.488889  0.160298\n",
       "7  Thur    Yes   19.190588  3.030000  2.352941  0.163863"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tips.groupby(['day','smoker'],as_index=False).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [],
   "source": [
    "def top(df:pd.DataFrame,n=5,column='tip_pct'):\n",
    "    return df.sort_values(by=column,ascending=False)[:n]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>total_bill</th>\n",
       "      <th>tip</th>\n",
       "      <th>smoker</th>\n",
       "      <th>day</th>\n",
       "      <th>time</th>\n",
       "      <th>size</th>\n",
       "      <th>tip_pct</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>172</th>\n",
       "      <td>7.25</td>\n",
       "      <td>5.15</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "      <td>0.710345</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178</th>\n",
       "      <td>9.60</td>\n",
       "      <td>4.00</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "      <td>0.416667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67</th>\n",
       "      <td>3.07</td>\n",
       "      <td>1.00</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>1</td>\n",
       "      <td>0.325733</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>232</th>\n",
       "      <td>11.61</td>\n",
       "      <td>3.39</td>\n",
       "      <td>No</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "      <td>0.291990</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>183</th>\n",
       "      <td>23.17</td>\n",
       "      <td>6.50</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "      <td>0.280535</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>109</th>\n",
       "      <td>14.31</td>\n",
       "      <td>4.00</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "      <td>0.279525</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     total_bill   tip smoker  day    time  size   tip_pct\n",
       "172        7.25  5.15    Yes  Sun  Dinner     2  0.710345\n",
       "178        9.60  4.00    Yes  Sun  Dinner     2  0.416667\n",
       "67         3.07  1.00    Yes  Sat  Dinner     1  0.325733\n",
       "232       11.61  3.39     No  Sat  Dinner     2  0.291990\n",
       "183       23.17  6.50    Yes  Sun  Dinner     4  0.280535\n",
       "109       14.31  4.00    Yes  Sat  Dinner     2  0.279525"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "top(tips,6)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>total_bill</th>\n",
       "      <th>tip</th>\n",
       "      <th>smoker</th>\n",
       "      <th>day</th>\n",
       "      <th>time</th>\n",
       "      <th>size</th>\n",
       "      <th>tip_pct</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>smoker</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">No</th>\n",
       "      <th>232</th>\n",
       "      <td>11.61</td>\n",
       "      <td>3.39</td>\n",
       "      <td>No</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "      <td>0.291990</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149</th>\n",
       "      <td>7.51</td>\n",
       "      <td>2.00</td>\n",
       "      <td>No</td>\n",
       "      <td>Thur</td>\n",
       "      <td>Lunch</td>\n",
       "      <td>2</td>\n",
       "      <td>0.266312</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>10.29</td>\n",
       "      <td>2.60</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "      <td>0.252672</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>185</th>\n",
       "      <td>20.69</td>\n",
       "      <td>5.00</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>5</td>\n",
       "      <td>0.241663</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>88</th>\n",
       "      <td>24.71</td>\n",
       "      <td>5.85</td>\n",
       "      <td>No</td>\n",
       "      <td>Thur</td>\n",
       "      <td>Lunch</td>\n",
       "      <td>2</td>\n",
       "      <td>0.236746</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">Yes</th>\n",
       "      <th>172</th>\n",
       "      <td>7.25</td>\n",
       "      <td>5.15</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "      <td>0.710345</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178</th>\n",
       "      <td>9.60</td>\n",
       "      <td>4.00</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "      <td>0.416667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67</th>\n",
       "      <td>3.07</td>\n",
       "      <td>1.00</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>1</td>\n",
       "      <td>0.325733</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>183</th>\n",
       "      <td>23.17</td>\n",
       "      <td>6.50</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "      <td>0.280535</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>109</th>\n",
       "      <td>14.31</td>\n",
       "      <td>4.00</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "      <td>0.279525</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            total_bill   tip smoker   day    time  size   tip_pct\n",
       "smoker                                                           \n",
       "No     232       11.61  3.39     No   Sat  Dinner     2  0.291990\n",
       "       149        7.51  2.00     No  Thur   Lunch     2  0.266312\n",
       "       51        10.29  2.60     No   Sun  Dinner     2  0.252672\n",
       "       185       20.69  5.00     No   Sun  Dinner     5  0.241663\n",
       "       88        24.71  5.85     No  Thur   Lunch     2  0.236746\n",
       "Yes    172        7.25  5.15    Yes   Sun  Dinner     2  0.710345\n",
       "       178        9.60  4.00    Yes   Sun  Dinner     2  0.416667\n",
       "       67         3.07  1.00    Yes   Sat  Dinner     1  0.325733\n",
       "       183       23.17  6.50    Yes   Sun  Dinner     4  0.280535\n",
       "       109       14.31  4.00    Yes   Sat  Dinner     2  0.279525"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tips.groupby('smoker').apply(top)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>total_bill</th>\n",
       "      <th>tip</th>\n",
       "      <th>smoker</th>\n",
       "      <th>day</th>\n",
       "      <th>time</th>\n",
       "      <th>size</th>\n",
       "      <th>tip_pct</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>smoker</th>\n",
       "      <th>day</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">No</th>\n",
       "      <th>Fri</th>\n",
       "      <th>94</th>\n",
       "      <td>22.75</td>\n",
       "      <td>3.25</td>\n",
       "      <td>No</td>\n",
       "      <td>Fri</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "      <td>0.142857</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sat</th>\n",
       "      <th>212</th>\n",
       "      <td>48.33</td>\n",
       "      <td>9.00</td>\n",
       "      <td>No</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "      <td>0.186220</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sun</th>\n",
       "      <th>156</th>\n",
       "      <td>48.17</td>\n",
       "      <td>5.00</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>6</td>\n",
       "      <td>0.103799</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Thur</th>\n",
       "      <th>142</th>\n",
       "      <td>41.19</td>\n",
       "      <td>5.00</td>\n",
       "      <td>No</td>\n",
       "      <td>Thur</td>\n",
       "      <td>Lunch</td>\n",
       "      <td>5</td>\n",
       "      <td>0.121389</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">Yes</th>\n",
       "      <th>Fri</th>\n",
       "      <th>95</th>\n",
       "      <td>40.17</td>\n",
       "      <td>4.73</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Fri</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "      <td>0.117750</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sat</th>\n",
       "      <th>170</th>\n",
       "      <td>50.81</td>\n",
       "      <td>10.00</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "      <td>0.196812</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sun</th>\n",
       "      <th>182</th>\n",
       "      <td>45.35</td>\n",
       "      <td>3.50</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "      <td>0.077178</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Thur</th>\n",
       "      <th>197</th>\n",
       "      <td>43.11</td>\n",
       "      <td>5.00</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Thur</td>\n",
       "      <td>Lunch</td>\n",
       "      <td>4</td>\n",
       "      <td>0.115982</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 total_bill    tip smoker   day    time  size   tip_pct\n",
       "smoker day                                                             \n",
       "No     Fri  94        22.75   3.25     No   Fri  Dinner     2  0.142857\n",
       "       Sat  212       48.33   9.00     No   Sat  Dinner     4  0.186220\n",
       "       Sun  156       48.17   5.00     No   Sun  Dinner     6  0.103799\n",
       "       Thur 142       41.19   5.00     No  Thur   Lunch     5  0.121389\n",
       "Yes    Fri  95        40.17   4.73    Yes   Fri  Dinner     4  0.117750\n",
       "       Sat  170       50.81  10.00    Yes   Sat  Dinner     3  0.196812\n",
       "       Sun  182       45.35   3.50    Yes   Sun  Dinner     3  0.077178\n",
       "       Thur 197       43.11   5.00    Yes  Thur   Lunch     4  0.115982"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tips.groupby(['smoker','day']).apply(top,n=1,column='total_bill')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = tips.groupby('smoker')['tip_pct'].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>smoker</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>No</th>\n",
       "      <td>151.0</td>\n",
       "      <td>0.159328</td>\n",
       "      <td>0.039910</td>\n",
       "      <td>0.056797</td>\n",
       "      <td>0.136906</td>\n",
       "      <td>0.155625</td>\n",
       "      <td>0.185014</td>\n",
       "      <td>0.291990</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>93.0</td>\n",
       "      <td>0.163196</td>\n",
       "      <td>0.085119</td>\n",
       "      <td>0.035638</td>\n",
       "      <td>0.106771</td>\n",
       "      <td>0.153846</td>\n",
       "      <td>0.195059</td>\n",
       "      <td>0.710345</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        count      mean       std       min       25%       50%       75%  \\\n",
       "smoker                                                                      \n",
       "No      151.0  0.159328  0.039910  0.056797  0.136906  0.155625  0.185014   \n",
       "Yes      93.0  0.163196  0.085119  0.035638  0.106771  0.153846  0.195059   \n",
       "\n",
       "             max  \n",
       "smoker            \n",
       "No      0.291990  \n",
       "Yes     0.710345  "
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "       smoker\n",
       "count  No        151.000000\n",
       "       Yes        93.000000\n",
       "mean   No          0.159328\n",
       "       Yes         0.163196\n",
       "std    No          0.039910\n",
       "       Yes         0.085119\n",
       "min    No          0.056797\n",
       "       Yes         0.035638\n",
       "25%    No          0.136906\n",
       "       Yes         0.106771\n",
       "50%    No          0.155625\n",
       "       Yes         0.153846\n",
       "75%    No          0.185014\n",
       "       Yes         0.195059\n",
       "max    No          0.291990\n",
       "       Yes         0.710345\n",
       "dtype: float64"
      ]
     },
     "execution_count": 89,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result.unstack()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>total_bill</th>\n",
       "      <th>tip</th>\n",
       "      <th>smoker</th>\n",
       "      <th>day</th>\n",
       "      <th>time</th>\n",
       "      <th>size</th>\n",
       "      <th>tip_pct</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>232</th>\n",
       "      <td>11.61</td>\n",
       "      <td>3.39</td>\n",
       "      <td>No</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "      <td>0.291990</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149</th>\n",
       "      <td>7.51</td>\n",
       "      <td>2.00</td>\n",
       "      <td>No</td>\n",
       "      <td>Thur</td>\n",
       "      <td>Lunch</td>\n",
       "      <td>2</td>\n",
       "      <td>0.266312</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>10.29</td>\n",
       "      <td>2.60</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "      <td>0.252672</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>185</th>\n",
       "      <td>20.69</td>\n",
       "      <td>5.00</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>5</td>\n",
       "      <td>0.241663</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>88</th>\n",
       "      <td>24.71</td>\n",
       "      <td>5.85</td>\n",
       "      <td>No</td>\n",
       "      <td>Thur</td>\n",
       "      <td>Lunch</td>\n",
       "      <td>2</td>\n",
       "      <td>0.236746</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>172</th>\n",
       "      <td>7.25</td>\n",
       "      <td>5.15</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "      <td>0.710345</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178</th>\n",
       "      <td>9.60</td>\n",
       "      <td>4.00</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "      <td>0.416667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67</th>\n",
       "      <td>3.07</td>\n",
       "      <td>1.00</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>1</td>\n",
       "      <td>0.325733</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>183</th>\n",
       "      <td>23.17</td>\n",
       "      <td>6.50</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "      <td>0.280535</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>109</th>\n",
       "      <td>14.31</td>\n",
       "      <td>4.00</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "      <td>0.279525</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     total_bill   tip smoker   day    time  size   tip_pct\n",
       "232       11.61  3.39     No   Sat  Dinner     2  0.291990\n",
       "149        7.51  2.00     No  Thur   Lunch     2  0.266312\n",
       "51        10.29  2.60     No   Sun  Dinner     2  0.252672\n",
       "185       20.69  5.00     No   Sun  Dinner     5  0.241663\n",
       "88        24.71  5.85     No  Thur   Lunch     2  0.236746\n",
       "172        7.25  5.15    Yes   Sun  Dinner     2  0.710345\n",
       "178        9.60  4.00    Yes   Sun  Dinner     2  0.416667\n",
       "67         3.07  1.00    Yes   Sat  Dinner     1  0.325733\n",
       "183       23.17  6.50    Yes   Sun  Dinner     4  0.280535\n",
       "109       14.31  4.00    Yes   Sat  Dinner     2  0.279525"
      ]
     },
     "execution_count": 94,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tips.groupby('smoker',group_keys=False).apply(top)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [],
   "source": [
    "In [82]: frame = pd.DataFrame({'data1': np.random.randn(1000),\n",
    "....: 'data2': np.random.randn(1000)})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [],
   "source": [
    "quartiles = pd.cut(frame.data1,4,labels=list('ABCD'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    D\n",
       "1    C\n",
       "2    B\n",
       "3    C\n",
       "4    C\n",
       "5    B\n",
       "6    C\n",
       "7    C\n",
       "8    C\n",
       "9    C\n",
       "Name: data1, dtype: category\n",
       "Categories (4, object): [A < B < C < D]"
      ]
     },
     "execution_count": 100,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "quartiles[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {},
   "outputs": [],
   "source": [
    "In [85]: def get_stats(group):\n",
    "....:     return {'min': group.min(), 'max': group.max(),\n",
    "....: 'count': group.count(), 'mean': group.mean()}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [],
   "source": [
    "grouped = frame.data2.groupby(quartiles)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>max</th>\n",
       "      <th>mean</th>\n",
       "      <th>min</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>data1</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A</th>\n",
       "      <td>74.0</td>\n",
       "      <td>2.577720</td>\n",
       "      <td>0.063620</td>\n",
       "      <td>-2.354825</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B</th>\n",
       "      <td>495.0</td>\n",
       "      <td>3.962488</td>\n",
       "      <td>-0.016237</td>\n",
       "      <td>-2.708933</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C</th>\n",
       "      <td>386.0</td>\n",
       "      <td>3.224489</td>\n",
       "      <td>0.034423</td>\n",
       "      <td>-2.669906</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>D</th>\n",
       "      <td>45.0</td>\n",
       "      <td>2.510735</td>\n",
       "      <td>0.160194</td>\n",
       "      <td>-3.053462</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       count       max      mean       min\n",
       "data1                                     \n",
       "A       74.0  2.577720  0.063620 -2.354825\n",
       "B      495.0  3.962488 -0.016237 -2.708933\n",
       "C      386.0  3.224489  0.034423 -2.669906\n",
       "D       45.0  2.510735  0.160194 -3.053462"
      ]
     },
     "execution_count": 104,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grouped.apply(get_stats).unstack()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [],
   "source": [
    "grouping = pd.qcut(frame.data1,10,labels=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>max</th>\n",
       "      <th>mean</th>\n",
       "      <th>min</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>data1</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>100.0</td>\n",
       "      <td>2.577720</td>\n",
       "      <td>0.051775</td>\n",
       "      <td>-2.354825</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>100.0</td>\n",
       "      <td>2.424165</td>\n",
       "      <td>0.083315</td>\n",
       "      <td>-2.458991</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>100.0</td>\n",
       "      <td>3.962488</td>\n",
       "      <td>-0.073071</td>\n",
       "      <td>-2.708933</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>100.0</td>\n",
       "      <td>2.649126</td>\n",
       "      <td>-0.012143</td>\n",
       "      <td>-2.425769</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>100.0</td>\n",
       "      <td>2.459466</td>\n",
       "      <td>-0.068484</td>\n",
       "      <td>-2.510648</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>100.0</td>\n",
       "      <td>2.785451</td>\n",
       "      <td>0.041796</td>\n",
       "      <td>-2.442759</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>100.0</td>\n",
       "      <td>3.224489</td>\n",
       "      <td>0.065130</td>\n",
       "      <td>-2.669906</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>100.0</td>\n",
       "      <td>2.933665</td>\n",
       "      <td>-0.123154</td>\n",
       "      <td>-2.162822</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>100.0</td>\n",
       "      <td>2.458618</td>\n",
       "      <td>0.080460</td>\n",
       "      <td>-2.487287</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>100.0</td>\n",
       "      <td>2.712277</td>\n",
       "      <td>0.126042</td>\n",
       "      <td>-3.053462</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       count       max      mean       min\n",
       "data1                                     \n",
       "0      100.0  2.577720  0.051775 -2.354825\n",
       "1      100.0  2.424165  0.083315 -2.458991\n",
       "2      100.0  3.962488 -0.073071 -2.708933\n",
       "3      100.0  2.649126 -0.012143 -2.425769\n",
       "4      100.0  2.459466 -0.068484 -2.510648\n",
       "5      100.0  2.785451  0.041796 -2.442759\n",
       "6      100.0  3.224489  0.065130 -2.669906\n",
       "7      100.0  2.933665 -0.123154 -2.162822\n",
       "8      100.0  2.458618  0.080460 -2.487287\n",
       "9      100.0  2.712277  0.126042 -3.053462"
      ]
     },
     "execution_count": 106,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grouped = frame.data2.groupby(grouping)\n",
    "grouped.apply(get_stats).unstack()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [],
   "source": [
    "In [91]: s = pd.Series(np.random.randn(6))\n",
    "In [92]: s[::2] = np.nan"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0         NaN\n",
       "1   -0.898817\n",
       "2         NaN\n",
       "3    1.174494\n",
       "4         NaN\n",
       "5    0.399335\n",
       "dtype: float64"
      ]
     },
     "execution_count": 108,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    0.225004\n",
       "1   -0.898817\n",
       "2    0.225004\n",
       "3    1.174494\n",
       "4    0.225004\n",
       "5    0.399335\n",
       "dtype: float64"
      ]
     },
     "execution_count": 109,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s.fillna(s.mean())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {},
   "outputs": [],
   "source": [
    "In [95]: states = ['Ohio', 'New York', 'Vermont', 'Florida',\n",
    "....: 'Oregon', 'Nevada', 'California', 'Idaho']\n",
    "In [96]: group_key = ['East'] * 4 + ['West'] * 4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [],
   "source": [
    "In [97]: data = pd.Series(np.random.randn(8), index=states)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Ohio         -0.030390\n",
       "New York     -0.315399\n",
       "Vermont      -0.528030\n",
       "Florida      -0.970926\n",
       "Oregon       -2.404301\n",
       "Nevada       -0.464205\n",
       "California   -0.185434\n",
       "Idaho         0.802551\n",
       "dtype: float64"
      ]
     },
     "execution_count": 112,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [],
   "source": [
    "data[['Vermont', 'Nevada', 'Idaho']] = np.nan"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Ohio         -0.030390\n",
       "New York     -0.315399\n",
       "Vermont            NaN\n",
       "Florida      -0.970926\n",
       "Oregon       -2.404301\n",
       "Nevada             NaN\n",
       "California   -0.185434\n",
       "Idaho              NaN\n",
       "dtype: float64"
      ]
     },
     "execution_count": 114,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "East   -0.438905\n",
       "West   -1.294868\n",
       "dtype: float64"
      ]
     },
     "execution_count": 115,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.groupby(group_key).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [],
   "source": [
    "fill_mean = lambda g:g.fillna(g.mean())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Ohio         -0.030390\n",
       "New York     -0.315399\n",
       "Vermont      -0.438905\n",
       "Florida      -0.970926\n",
       "Oregon       -2.404301\n",
       "Nevada       -1.294868\n",
       "California   -0.185434\n",
       "Idaho        -1.294868\n",
       "dtype: float64"
      ]
     },
     "execution_count": 117,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.groupby(group_key).apply(fill_mean)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {},
   "outputs": [],
   "source": [
    "fill_values = {'East':0.5,'West':-1}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {},
   "outputs": [],
   "source": [
    "fill_func = lambda g:g.fillna(fill_values[g.name])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Ohio         -0.030390\n",
       "New York     -0.315399\n",
       "Vermont       0.500000\n",
       "Florida      -0.970926\n",
       "Oregon       -2.404301\n",
       "Nevada       -1.000000\n",
       "California   -0.185434\n",
       "Idaho        -1.000000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 120,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.groupby(group_key).apply(fill_func)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "metadata": {},
   "outputs": [],
   "source": [
    "suits = ['H', 'S', 'C', 'D']\n",
    "card_val = (list(range(1, 11)) + [10] * 3) * 4\n",
    "base_names = ['A'] + list(range(2, 11)) + ['J', 'K', 'Q']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {},
   "outputs": [],
   "source": [
    "cards=[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [],
   "source": [
    "for suit in suits:\n",
    "    cards.extend(str(num)+suit for num in base_names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['AH',\n",
       " '2H',\n",
       " '3H',\n",
       " '4H',\n",
       " '5H',\n",
       " '6H',\n",
       " '7H',\n",
       " '8H',\n",
       " '9H',\n",
       " '10H',\n",
       " 'JH',\n",
       " 'KH',\n",
       " 'QH',\n",
       " 'AS',\n",
       " '2S',\n",
       " '3S',\n",
       " '4S',\n",
       " '5S',\n",
       " '6S',\n",
       " '7S',\n",
       " '8S',\n",
       " '9S',\n",
       " '10S',\n",
       " 'JS',\n",
       " 'KS',\n",
       " 'QS',\n",
       " 'AC',\n",
       " '2C',\n",
       " '3C',\n",
       " '4C',\n",
       " '5C',\n",
       " '6C',\n",
       " '7C',\n",
       " '8C',\n",
       " '9C',\n",
       " '10C',\n",
       " 'JC',\n",
       " 'KC',\n",
       " 'QC',\n",
       " 'AD',\n",
       " '2D',\n",
       " '3D',\n",
       " '4D',\n",
       " '5D',\n",
       " '6D',\n",
       " '7D',\n",
       " '8D',\n",
       " '9D',\n",
       " '10D',\n",
       " 'JD',\n",
       " 'KD',\n",
       " 'QD']"
      ]
     },
     "execution_count": 124,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cards"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {},
   "outputs": [],
   "source": [
    "deck = pd.Series(card_val,index=cards)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AH      1\n",
       "2H      2\n",
       "3H      3\n",
       "4H      4\n",
       "5H      5\n",
       "6H      6\n",
       "7H      7\n",
       "8H      8\n",
       "9H      9\n",
       "10H    10\n",
       "JH     10\n",
       "KH     10\n",
       "QH     10\n",
       "dtype: int64"
      ]
     },
     "execution_count": 126,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "deck[:13]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "metadata": {},
   "outputs": [],
   "source": [
    "def draw(deck,n=5):\n",
    "    return deck.sample(n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "7C      7\n",
       "10S    10\n",
       "10C    10\n",
       "JH     10\n",
       "7S      7\n",
       "dtype: int64"
      ]
     },
     "execution_count": 128,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "draw(deck)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {},
   "outputs": [],
   "source": [
    "get_suit = lambda card:card[-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "C  8C     8\n",
       "   4C     4\n",
       "D  JD    10\n",
       "   9D     9\n",
       "H  9H     9\n",
       "   6H     6\n",
       "S  4S     4\n",
       "   9S     9\n",
       "dtype: int64"
      ]
     },
     "execution_count": 130,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "deck.groupby(get_suit).apply(draw,n=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {},
   "outputs": [],
   "source": [
    "In [114]: df = pd.DataFrame({'category': ['a', 'a', 'a', 'a',\n",
    ".....: 'b', 'b', 'b', 'b'],\n",
    ".....: 'data': np.random.randn(8),\n",
    ".....: 'weights': np.random.rand(8)})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>category</th>\n",
       "      <th>data</th>\n",
       "      <th>weights</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>a</td>\n",
       "      <td>0.719916</td>\n",
       "      <td>0.738588</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>a</td>\n",
       "      <td>-0.143945</td>\n",
       "      <td>0.743668</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>a</td>\n",
       "      <td>-0.772511</td>\n",
       "      <td>0.757915</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>a</td>\n",
       "      <td>0.544861</td>\n",
       "      <td>0.813104</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>b</td>\n",
       "      <td>0.172390</td>\n",
       "      <td>0.981232</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>b</td>\n",
       "      <td>0.200872</td>\n",
       "      <td>0.290250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>b</td>\n",
       "      <td>0.103508</td>\n",
       "      <td>0.863467</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>b</td>\n",
       "      <td>-0.405674</td>\n",
       "      <td>0.317384</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  category      data   weights\n",
       "0        a  0.719916  0.738588\n",
       "1        a -0.143945  0.743668\n",
       "2        a -0.772511  0.757915\n",
       "3        a  0.544861  0.813104\n",
       "4        b  0.172390  0.981232\n",
       "5        b  0.200872  0.290250\n",
       "6        b  0.103508  0.863467\n",
       "7        b -0.405674  0.317384"
      ]
     },
     "execution_count": 132,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {},
   "outputs": [],
   "source": [
    "grouped = df.groupby('category')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "metadata": {},
   "outputs": [],
   "source": [
    "get_wavg = lambda g:np.average(g['data'],weights=g['weights'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "category\n",
       "a    0.092427\n",
       "b    0.076694\n",
       "dtype: float64"
      ]
     },
     "execution_count": 138,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grouped.apply(get_wavg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "DatetimeIndex: 2214 entries, 2003-01-02 to 2011-10-14\n",
      "Data columns (total 4 columns):\n",
      "AAPL    2214 non-null float64\n",
      "MSFT    2214 non-null float64\n",
      "XOM     2214 non-null float64\n",
      "SPX     2214 non-null float64\n",
      "dtypes: float64(4)\n",
      "memory usage: 86.5 KB\n"
     ]
    }
   ],
   "source": [
    "In [119]: close_px = pd.read_csv('examples/stock_px_2.csv',\n",
    "parse_dates=True,\n",
    ".....: index_col=0)\n",
    "In [120]: close_px.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>total_bill</th>\n",
       "      <th>tip</th>\n",
       "      <th>smoker</th>\n",
       "      <th>day</th>\n",
       "      <th>time</th>\n",
       "      <th>size</th>\n",
       "      <th>tip_pct</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>16.99</td>\n",
       "      <td>1.01</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "      <td>0.059447</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10.34</td>\n",
       "      <td>1.66</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "      <td>0.160542</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>21.01</td>\n",
       "      <td>3.50</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "      <td>0.166587</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>23.68</td>\n",
       "      <td>3.31</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "      <td>0.139780</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>24.59</td>\n",
       "      <td>3.61</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "      <td>0.146808</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>25.29</td>\n",
       "      <td>4.71</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "      <td>0.186240</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   total_bill   tip smoker  day    time  size   tip_pct\n",
       "0       16.99  1.01     No  Sun  Dinner     2  0.059447\n",
       "1       10.34  1.66     No  Sun  Dinner     3  0.160542\n",
       "2       21.01  3.50     No  Sun  Dinner     3  0.166587\n",
       "3       23.68  3.31     No  Sun  Dinner     2  0.139780\n",
       "4       24.59  3.61     No  Sun  Dinner     4  0.146808\n",
       "5       25.29  4.71     No  Sun  Dinner     4  0.186240"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "In [57]: tips = pd.read_csv('examples/tips.csv')\n",
    "# Add tip percentage of total bill\n",
    "In [58]: tips['tip_pct'] = tips['tip'] / tips['total_bill']\n",
    "In [59]: tips[:6]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>size</th>\n",
       "      <th>tip</th>\n",
       "      <th>tip_pct</th>\n",
       "      <th>total_bill</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>day</th>\n",
       "      <th>smoker</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Fri</th>\n",
       "      <th>No</th>\n",
       "      <td>2.250000</td>\n",
       "      <td>2.812500</td>\n",
       "      <td>0.151650</td>\n",
       "      <td>18.420000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>2.066667</td>\n",
       "      <td>2.714000</td>\n",
       "      <td>0.174783</td>\n",
       "      <td>16.813333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Sat</th>\n",
       "      <th>No</th>\n",
       "      <td>2.555556</td>\n",
       "      <td>3.102889</td>\n",
       "      <td>0.158048</td>\n",
       "      <td>19.661778</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>2.476190</td>\n",
       "      <td>2.875476</td>\n",
       "      <td>0.147906</td>\n",
       "      <td>21.276667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Sun</th>\n",
       "      <th>No</th>\n",
       "      <td>2.929825</td>\n",
       "      <td>3.167895</td>\n",
       "      <td>0.160113</td>\n",
       "      <td>20.506667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>2.578947</td>\n",
       "      <td>3.516842</td>\n",
       "      <td>0.187250</td>\n",
       "      <td>24.120000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Thur</th>\n",
       "      <th>No</th>\n",
       "      <td>2.488889</td>\n",
       "      <td>2.673778</td>\n",
       "      <td>0.160298</td>\n",
       "      <td>17.113111</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>2.352941</td>\n",
       "      <td>3.030000</td>\n",
       "      <td>0.163863</td>\n",
       "      <td>19.190588</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 size       tip   tip_pct  total_bill\n",
       "day  smoker                                          \n",
       "Fri  No      2.250000  2.812500  0.151650   18.420000\n",
       "     Yes     2.066667  2.714000  0.174783   16.813333\n",
       "Sat  No      2.555556  3.102889  0.158048   19.661778\n",
       "     Yes     2.476190  2.875476  0.147906   21.276667\n",
       "Sun  No      2.929825  3.167895  0.160113   20.506667\n",
       "     Yes     2.578947  3.516842  0.187250   24.120000\n",
       "Thur No      2.488889  2.673778  0.160298   17.113111\n",
       "     Yes     2.352941  3.030000  0.163863   19.190588"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tips.pivot_table(index=['day','smoker'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th colspan=\"2\" halign=\"left\">size</th>\n",
       "      <th colspan=\"2\" halign=\"left\">tip_pct</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>smoker</th>\n",
       "      <th>No</th>\n",
       "      <th>Yes</th>\n",
       "      <th>No</th>\n",
       "      <th>Yes</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>time</th>\n",
       "      <th>day</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">Dinner</th>\n",
       "      <th>Fri</th>\n",
       "      <td>2.000000</td>\n",
       "      <td>2.222222</td>\n",
       "      <td>0.139622</td>\n",
       "      <td>0.165347</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sat</th>\n",
       "      <td>2.555556</td>\n",
       "      <td>2.476190</td>\n",
       "      <td>0.158048</td>\n",
       "      <td>0.147906</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sun</th>\n",
       "      <td>2.929825</td>\n",
       "      <td>2.578947</td>\n",
       "      <td>0.160113</td>\n",
       "      <td>0.187250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Thur</th>\n",
       "      <td>2.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.159744</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Lunch</th>\n",
       "      <th>Fri</th>\n",
       "      <td>3.000000</td>\n",
       "      <td>1.833333</td>\n",
       "      <td>0.187735</td>\n",
       "      <td>0.188937</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Thur</th>\n",
       "      <td>2.500000</td>\n",
       "      <td>2.352941</td>\n",
       "      <td>0.160311</td>\n",
       "      <td>0.163863</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 size             tip_pct          \n",
       "smoker             No       Yes        No       Yes\n",
       "time   day                                         \n",
       "Dinner Fri   2.000000  2.222222  0.139622  0.165347\n",
       "       Sat   2.555556  2.476190  0.158048  0.147906\n",
       "       Sun   2.929825  2.578947  0.160113  0.187250\n",
       "       Thur  2.000000       NaN  0.159744       NaN\n",
       "Lunch  Fri   3.000000  1.833333  0.187735  0.188937\n",
       "       Thur  2.500000  2.352941  0.160311  0.163863"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tips.pivot_table(['size','tip_pct'],index=['time','day'],columns=['smoker'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "tips.pivot_table?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th colspan=\"3\" halign=\"left\">size</th>\n",
       "      <th colspan=\"3\" halign=\"left\">tip_pct</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>smoker</th>\n",
       "      <th>No</th>\n",
       "      <th>Yes</th>\n",
       "      <th>All</th>\n",
       "      <th>No</th>\n",
       "      <th>Yes</th>\n",
       "      <th>All</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>time</th>\n",
       "      <th>day</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">Dinner</th>\n",
       "      <th>Fri</th>\n",
       "      <td>2.000000</td>\n",
       "      <td>2.222222</td>\n",
       "      <td>2.166667</td>\n",
       "      <td>0.139622</td>\n",
       "      <td>0.165347</td>\n",
       "      <td>0.158916</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sat</th>\n",
       "      <td>2.555556</td>\n",
       "      <td>2.476190</td>\n",
       "      <td>2.517241</td>\n",
       "      <td>0.158048</td>\n",
       "      <td>0.147906</td>\n",
       "      <td>0.153152</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sun</th>\n",
       "      <td>2.929825</td>\n",
       "      <td>2.578947</td>\n",
       "      <td>2.842105</td>\n",
       "      <td>0.160113</td>\n",
       "      <td>0.187250</td>\n",
       "      <td>0.166897</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Thur</th>\n",
       "      <td>2.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>0.159744</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.159744</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Lunch</th>\n",
       "      <th>Fri</th>\n",
       "      <td>3.000000</td>\n",
       "      <td>1.833333</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>0.187735</td>\n",
       "      <td>0.188937</td>\n",
       "      <td>0.188765</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Thur</th>\n",
       "      <td>2.500000</td>\n",
       "      <td>2.352941</td>\n",
       "      <td>2.459016</td>\n",
       "      <td>0.160311</td>\n",
       "      <td>0.163863</td>\n",
       "      <td>0.161301</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>All</th>\n",
       "      <th></th>\n",
       "      <td>2.668874</td>\n",
       "      <td>2.408602</td>\n",
       "      <td>2.569672</td>\n",
       "      <td>0.159328</td>\n",
       "      <td>0.163196</td>\n",
       "      <td>0.160803</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 size                       tip_pct                    \n",
       "smoker             No       Yes       All        No       Yes       All\n",
       "time   day                                                             \n",
       "Dinner Fri   2.000000  2.222222  2.166667  0.139622  0.165347  0.158916\n",
       "       Sat   2.555556  2.476190  2.517241  0.158048  0.147906  0.153152\n",
       "       Sun   2.929825  2.578947  2.842105  0.160113  0.187250  0.166897\n",
       "       Thur  2.000000       NaN  2.000000  0.159744       NaN  0.159744\n",
       "Lunch  Fri   3.000000  1.833333  2.000000  0.187735  0.188937  0.188765\n",
       "       Thur  2.500000  2.352941  2.459016  0.160311  0.163863  0.161301\n",
       "All          2.668874  2.408602  2.569672  0.159328  0.163196  0.160803"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tips.pivot_table(['size','tip_pct'],index=['time','day'],columns=['smoker'],margins=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>day</th>\n",
       "      <th>Fri</th>\n",
       "      <th>Sat</th>\n",
       "      <th>Sun</th>\n",
       "      <th>Thur</th>\n",
       "      <th>All</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>time</th>\n",
       "      <th>smoker</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Dinner</th>\n",
       "      <th>No</th>\n",
       "      <td>3.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>57.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>106.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>9.0</td>\n",
       "      <td>42.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>70.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Lunch</th>\n",
       "      <th>No</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>44.0</td>\n",
       "      <td>45.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>17.0</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>All</th>\n",
       "      <th></th>\n",
       "      <td>19.0</td>\n",
       "      <td>87.0</td>\n",
       "      <td>76.0</td>\n",
       "      <td>62.0</td>\n",
       "      <td>244.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "day             Fri   Sat   Sun  Thur    All\n",
       "time   smoker                               \n",
       "Dinner No       3.0  45.0  57.0   1.0  106.0\n",
       "       Yes      9.0  42.0  19.0   NaN   70.0\n",
       "Lunch  No       1.0   NaN   NaN  44.0   45.0\n",
       "       Yes      6.0   NaN   NaN  17.0   23.0\n",
       "All            19.0  87.0  76.0  62.0  244.0"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tips.pivot_table('tip_pct',index=['time','smoker'],columns='day',aggfunc=len,margins=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>day</th>\n",
       "      <th>Fri</th>\n",
       "      <th>Sat</th>\n",
       "      <th>Sun</th>\n",
       "      <th>Thur</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>time</th>\n",
       "      <th>size</th>\n",
       "      <th>smoker</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"11\" valign=\"top\">Dinner</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">1</th>\n",
       "      <th>No</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.137931</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.325733</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">2</th>\n",
       "      <th>No</th>\n",
       "      <td>0.139622</td>\n",
       "      <td>0.162705</td>\n",
       "      <td>0.168859</td>\n",
       "      <td>0.159744</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>0.171297</td>\n",
       "      <td>0.148668</td>\n",
       "      <td>0.207893</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">3</th>\n",
       "      <th>No</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.154661</td>\n",
       "      <td>0.152663</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.144995</td>\n",
       "      <td>0.152660</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">4</th>\n",
       "      <th>No</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.150096</td>\n",
       "      <td>0.148143</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>0.117750</td>\n",
       "      <td>0.124515</td>\n",
       "      <td>0.193370</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">5</th>\n",
       "      <th>No</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.206928</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.106572</td>\n",
       "      <td>0.065660</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <th>No</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.103799</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"10\" valign=\"top\">Lunch</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">1</th>\n",
       "      <th>No</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.181728</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>0.223776</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">2</th>\n",
       "      <th>No</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.166005</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>0.181969</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.158843</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">3</th>\n",
       "      <th>No</th>\n",
       "      <td>0.187735</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.084246</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.204952</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">4</th>\n",
       "      <th>No</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.138919</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yes</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.155410</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <th>No</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.121389</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <th>No</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.173706</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "day                      Fri       Sat       Sun      Thur\n",
       "time   size smoker                                        \n",
       "Dinner 1    No      0.000000  0.137931  0.000000  0.000000\n",
       "            Yes     0.000000  0.325733  0.000000  0.000000\n",
       "       2    No      0.139622  0.162705  0.168859  0.159744\n",
       "            Yes     0.171297  0.148668  0.207893  0.000000\n",
       "       3    No      0.000000  0.154661  0.152663  0.000000\n",
       "            Yes     0.000000  0.144995  0.152660  0.000000\n",
       "       4    No      0.000000  0.150096  0.148143  0.000000\n",
       "            Yes     0.117750  0.124515  0.193370  0.000000\n",
       "       5    No      0.000000  0.000000  0.206928  0.000000\n",
       "            Yes     0.000000  0.106572  0.065660  0.000000\n",
       "       6    No      0.000000  0.000000  0.103799  0.000000\n",
       "Lunch  1    No      0.000000  0.000000  0.000000  0.181728\n",
       "            Yes     0.223776  0.000000  0.000000  0.000000\n",
       "       2    No      0.000000  0.000000  0.000000  0.166005\n",
       "            Yes     0.181969  0.000000  0.000000  0.158843\n",
       "       3    No      0.187735  0.000000  0.000000  0.084246\n",
       "            Yes     0.000000  0.000000  0.000000  0.204952\n",
       "       4    No      0.000000  0.000000  0.000000  0.138919\n",
       "            Yes     0.000000  0.000000  0.000000  0.155410\n",
       "       5    No      0.000000  0.000000  0.000000  0.121389\n",
       "       6    No      0.000000  0.000000  0.000000  0.173706"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tips.pivot_table('tip_pct',index=['time','size','smoker'],columns='day',\n",
    "                 aggfunc='mean',fill_value=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>smoker</th>\n",
       "      <th>No</th>\n",
       "      <th>Yes</th>\n",
       "      <th>All</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>time</th>\n",
       "      <th>day</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">Dinner</th>\n",
       "      <th>Fri</th>\n",
       "      <td>3</td>\n",
       "      <td>9</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sat</th>\n",
       "      <td>45</td>\n",
       "      <td>42</td>\n",
       "      <td>87</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sun</th>\n",
       "      <td>57</td>\n",
       "      <td>19</td>\n",
       "      <td>76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Thur</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Lunch</th>\n",
       "      <th>Fri</th>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Thur</th>\n",
       "      <td>44</td>\n",
       "      <td>17</td>\n",
       "      <td>61</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>All</th>\n",
       "      <th></th>\n",
       "      <td>151</td>\n",
       "      <td>93</td>\n",
       "      <td>244</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "smoker        No  Yes  All\n",
       "time   day                \n",
       "Dinner Fri     3    9   12\n",
       "       Sat    45   42   87\n",
       "       Sun    57   19   76\n",
       "       Thur    1    0    1\n",
       "Lunch  Fri     1    6    7\n",
       "       Thur   44   17   61\n",
       "All          151   93  244"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.crosstab([tips.time,tips.day],tips.smoker,margins=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
